Faster R-CNN Setup

We first add Faster R-CNN to our PYTHONPATH. Faster-RCNN will use a pretrained neural net that was trained on IMAGENET. We leverage this pre-existing model (VGG16) to extract bounding boxes for our potential classes. As mentioned in our paper, this pre-trained image net relies on labeled data, so effectively, we're leveraging it in order to make a more finely tuned dataset. However, if we want to recognize completely novel objects, we can simply use DeepBox - that is, we don't need to have ANY prior dataset. We emphasize this in our paper as well.


In [1]:
import os
import sys

#Set the correct environment variables
os.environ['SNORKELHOME']='/home/thomas/snorkel'
os.environ['PYTHONPATH']=':/home/thomas/snorkel:/home/thomas/snorkel/treedlib:/home/thomas/snorkel:/home/thomas/snorkel/treedlib'
os.environ['PATH']='/home/thomas/bin:/home/thomas/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/thomas/snorkel:/home/thomas/snorkel/treedlib:/home/thomas/snorkel:/home/thomas/snorkel/treedlib'

#Add python to the system path so that python can find the package
sys.path.append('/home/thomas/snorkel')
sys.path.append('/home/thomas/snorkel/treedlib')

#Add R-CNN Tools to the system path -> This in turn adds the correct R-CNN paths, see _init_paths.py in the Tools folder
sys.path.append('/home/thomas/py-faster-rcnn/tools')

In [2]:
#From the demo py-faster-rcnn script <- Using a pre-trained neural network

import _init_paths
from fast_rcnn.config import cfg
from fast_rcnn.test import im_detect
from fast_rcnn.nms_wrapper import nms
from utils.timer import Timer
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import caffe, os, sys, cv2
import argparse

CLASSES = ('__background__',
           'aeroplane', 'bicycle', 'bird', 'boat',
           'bottle', 'bus', 'car', 'cat', 'chair',
           'cow', 'diningtable', 'dog', 'horse',
           'motorbike', 'person', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor')

NETS = {'vgg16': ('VGG16',
                  'VGG16_faster_rcnn_final.caffemodel'),
        'zf': ('ZF',
                  'ZF_faster_rcnn_final.caffemodel')}


def vis_detections(im, class_name, dets, thresh=0.5):
    """Draw detected bounding boxes."""
    inds = np.where(dets[:, -1] >= thresh)[0]
    if len(inds) == 0:
        return

    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 12))
    ax.imshow(im, aspect='equal')
    for i in inds:
        bbox = dets[i, :4]
        score = dets[i, -1]

        ax.add_patch(
            plt.Rectangle((bbox[0], bbox[1]),
                          bbox[2] - bbox[0],
                          bbox[3] - bbox[1], fill=False,
                          edgecolor='red', linewidth=3.5)
            )
        ax.text(bbox[0], bbox[1] - 2,
                '{:s} {:.3f}'.format(class_name, score),
                bbox=dict(facecolor='blue', alpha=0.5),
                fontsize=14, color='white')

    ax.set_title(('{} detections with '
                  'p({} | box) >= {:.1f}').format(class_name, class_name,
                                                  thresh),
                  fontsize=14)
    plt.axis('off')
    plt.tight_layout()
    plt.draw()
    
def filter_detections(im, dets, thresh=0.5):
    inds = np.where(dets[:, -1] >= thresh)[0]
    if len(inds) == 0:
        return
    return dets[inds]

def detectAndDrawObjects(net, image):
    """Detect object classes in an image using pre-computed object proposals."""
    
    # Load the demo image
    if(type(image) is str): #If given a string image path name, read in the image
        im = cv2.imread(image)
    else: #Otherwise, assume that image is the actual image
        im = image

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[6:8]): #We only care about the car and bus classes
        cls_ind += 6 # because we start at the 6th class
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]

        vis_detections(im, cls, dets, thresh=CONF_THRESH)
        
def detectAndReturnObjects(net, image):
    """Detect object classes in an image using pre-computed object proposals."""
    
    # Load the demo image
    if(type(image) is str): #If given a string image path name, read in the image
        im = cv2.imread(image)
    else: #Otherwise, assume that image is the actual image
        im = image
        
    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()

    # Visualize detections for each class
    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    
    boxesArray = np.empty((0,5), float)
    
    for cls_ind, cls in enumerate(CLASSES[6:8]): #We only care about the car and bus classes
        cls_ind += 6 # because we start at the 6th class
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        
        tempBoxes = filter_detections(im, dets, thresh=CONF_THRESH)
        if tempBoxes is not None:
            boxesArray = np.vstack((boxesArray, tempBoxes))

    return boxesArray

In [4]:
cfg.TEST.HAS_RPN = True  # Use RPN for proposals

prototxt = os.path.join(cfg.MODELS_DIR, NETS['vgg16'][0],
                        'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models',
                          NETS['vgg16'][1])

if not os.path.isfile(caffemodel):
    raise IOError(('{:s} not found.\nDid you run ./data/script/'
                   'fetch_faster_rcnn_models.sh?').format(caffemodel))

#Train on GPU ID 0 <- Only works if you've configured CUDA and your GPU correctly
caffe.set_mode_gpu()
caffe.set_device(0)
cfg.GPU_ID = 0
net = caffe.Net(prototxt, caffemodel, caffe.TEST)

print '\n\nLoaded network {:s}'.format(caffemodel)

#Take our test images and run R-CNN on them
im_path = '/home/thomas/pixy/Test_Cars/'
im_names = ['1.png', '2.png', '3.png']
im_names = [im_path + tempIm for tempIm in im_names]

for im_name in im_names:
    detectAndDrawObjects(net, im_name)

plt.show()



Loaded network /home/thomas/py-faster-rcnn/data/faster_rcnn_models/VGG16_faster_rcnn_final.caffemodel
Detection took 0.306s for 300 object proposals
Detection took 0.152s for 300 object proposals
Detection took 0.155s for 300 object proposals

Now let's just get the boxes out. Each box entry will have the format $\begin{bmatrix} x1 & y1 & x2 & y2 & score \end{bmatrix}$ where $0 \leq score \leq 1$.


In [5]:
import scipy.misc

cfg.TEST.HAS_RPN = True  # Use RPN for proposals

prototxt = os.path.join(cfg.MODELS_DIR, NETS['vgg16'][0],
                        'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models',
                          NETS['vgg16'][1])

if not os.path.isfile(caffemodel):
    raise IOError(('{:s} not found.\nDid you run ./data/script/'
                   'fetch_faster_rcnn_models.sh?').format(caffemodel))

#Train on GPU ID 0 <- Only works if you've configured CUDA and your GPU correctly
caffe.set_mode_gpu()
caffe.set_device(0)
cfg.GPU_ID = 0
net = caffe.Net(prototxt, caffemodel, caffe.TEST)

print '\n\nLoaded network {:s}'.format(caffemodel)

#Take our test images and run R-CNN on them
im_path = '/home/thomas/pixy/Test_Cars/'
im_names = ['1.png', '2.png', '3.png']
im_names = [im_path + tempIm for tempIm in im_names]
output_path = '/home/thomas/pixy/Test_Cars_Output/'
outputCounter = 0

#For some reason, cv2 just won't write the images if the directory doesn't exist
if not os.path.isdir(output_path):
    os.mkdir(output_path)

for im_name in im_names:
    boxes = detectAndReturnObjects(net, im_name)
    
    for box in boxes:
        im = cv2.imread(im_name)
        box = [int(num) for num in box]
        boxIm = im[box[1]:box[3], box[0]:box[2],:]
        
        #Write the segmented output
        cv2.imwrite(output_path + str(outputCounter) + '.jpg',boxIm)
        outputCounter += 1
        
        boxIm = cv2.cvtColor(boxIm, cv2.COLOR_BGR2RGB) #matplotlib uses a different storage format than cv2, this is just for displaying images
        plt.imshow(boxIm) #Rows of the image are the y axis, columns are the x axis
        plt.axis('off')
        plt.tight_layout()
        plt.show()



Loaded network /home/thomas/py-faster-rcnn/data/faster_rcnn_models/VGG16_faster_rcnn_final.caffemodel

R-CNN does a great job extracting the cars from our image. Now, let's run it on the real dataset. We'll be using images from the CityScapes dataset. Unfortunately, due to licensing issues, we can't include the CityScapes dataset in our repository (also because we can't upload 10 GB of images to GitHub). To gain access to the dataset, register at https://www.cityscapes-dataset.com/downloads/ and place the training images in the PIXY_PATH/CityScapes/ folder. The training images look extremely similar to the three images provided in the Test_Cars folder.


In [6]:
import scipy.misc
from tqdm import tqdm
from os import listdir
from os.path import isfile, join

cfg.TEST.HAS_RPN = True  # Use RPN for proposals

prototxt = os.path.join(cfg.MODELS_DIR, NETS['vgg16'][0],
                        'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models',
                          NETS['vgg16'][1])

if not os.path.isfile(caffemodel):
    raise IOError(('{:s} not found.\nDid you run ./data/script/'
                   'fetch_faster_rcnn_models.sh?').format(caffemodel))

#Train on GPU ID 0 <- Only works if you've configured CUDA and your GPU correctly
caffe.set_mode_gpu()
caffe.set_device(0)
cfg.GPU_ID = 0
net = caffe.Net(prototxt, caffemodel, caffe.TEST)

print '\n\nLoaded network {:s}'.format(caffemodel)

#Take our test images and run R-CNN on them
im_path = '/home/thomas/pixy/CityScapes/'
im_names = onlyfiles = [f for f in listdir(im_path) if isfile(join(im_path, f))]
im_names = [im_path + tempIm for tempIm in im_names]
output_path = '/home/thomas/pixy/CityScapes_Output/'
outputCounter = 0

#For some reason, cv2 just won't write the images if the directory doesn't exist
if not os.path.isdir(output_path):
    os.mkdir(output_path)

for im_name in tqdm(im_names):
    boxes = detectAndReturnObjects(net, im_name)
    
    for box in boxes:
        im = cv2.imread(im_name)
        box = [int(num) for num in box]
        boxIm = im[box[1]:box[3], box[0]:box[2],:]
        
        #Write the segmented output
        cv2.imwrite(output_path + str(outputCounter) + '.jpg',boxIm)
        outputCounter += 1


  0%|          | 0/2975 [00:00<?, ?it/s]

Loaded network /home/thomas/py-faster-rcnn/data/faster_rcnn_models/VGG16_faster_rcnn_final.caffemodel
100%|██████████| 2975/2975 [16:07<00:00,  3.54it/s]

In [7]:
print("Segmented " + str(outputCounter) + " cars!")


Segmented 7608 cars!

Visual Bag of Words

For each of our classes, we compute the SIFT features and k-means cluster in order to get our "visual words". A great overview of this method:: http://vision.ucla.edu/~vedaldi/assets/pubs/vedaldi10vlfeat-tutorial.pdf

1) First we provide the directory of our images and split our dataset into train and testing data.


In [3]:
#Import necessary libraries, following is based on the PHOW script, with modifications
import phow_caltech101 as phow
from datetime import datetime
from os.path import exists
from sklearn.kernel_approximation import AdditiveChi2Sampler
from cPickle import dump, load
from sklearn import svm
from sklearn.metrics import confusion_matrix, accuracy_score
import pylab as pl
import numpy as np
import matplotlib.pyplot as plt

#Define initial configuration setup variables
IMAGEDIRECTORY = 'CityScapes_Output_Labels' #ATTENTION: SET IMAGE DIRECTORY HERE, MUST BE IN SAME FOLDER AS NOTEBOOK
IDENTIFIER = 'cityscape1' #The identifier is a string that we use to cache our datasets with - using an identifier allows us to retrieve things from our cache later
OVERWRITE = True #Ignore the cache?
NUMTRAIN = 25 #From each directory, how many images do we want to train with to find our visual words
NUMTEST = 0 #From each directory, how many images do we want to use to test how good our visual words clustering is
NUMCLASSES = 4 #How many folders do we have, aka how many visual word clusterings do we have to do
NUMWORDS = 600 #How many words do we want to use to describe our image classes

conf = phow.Configuration(IDENTIFIER)
conf.setImagePath(IMAGEDIRECTORY)
conf.setNumTrain(NUMTRAIN)
conf.setNumTest(NUMTEST)
conf.setNumClasses(NUMCLASSES)
conf.setNumWords(NUMWORDS)

print str(datetime.now()) + '| Finished configuring system'

classes = phow.get_classes(conf.calDir, conf.numClasses)

model = phow.Model(classes, conf)

all_images, all_images_class_labels = phow.get_all_images(classes, conf)
selTrain, selTest = phow.create_split(all_images, conf)

print str(datetime.now()) + '| Found classes and created split'


2016-12-10 20:23:48.155656| Finished configuring system
2016-12-10 20:23:48.156441| Found classes and created split

We now use our selTrain data set and find our visual bag of words, we've specified to find 600 visual words.


In [4]:
##################
# Train vocabulary
##################
print str(datetime.now()) + '| Start training vocabulary - launching threads to do SIFT'
if (not exists(conf.vocabPath)) | OVERWRITE:
    vocab = phow.trainVocab(selTrain, all_images, conf)
    phow.savemat(conf.vocabPath, {'vocab': vocab})
else:
    print str(datetime.now()) + '| Done! Using old vocab from ' + conf.vocabPath
    vocab = phow.loadmat(conf.vocabPath)['vocab']

model.vocab = vocab #The columns of vocab are our visual words


2016-12-10 20:23:49.438075| Start training vocabulary - launching threads to do SIFT
2016-12-10 20:23:49.438195| Done! Using old vocab from tempresults/cityscape1-vocab.py.mat

Calculate amount of "contribution" each word has to a specific image. (Imagine that the vocab is a set of singular vectors and we're computing our singular values.)

First, we pull out the SIFT vector from each image.


In [6]:
############################
# Compute spatial histograms
############################
print str(datetime.now()) + '| Computing Spatial Histograms'
if (not exists(conf.histPath)) | OVERWRITE:
    hists = phow.computeHistograms(all_images, model, conf, vocab)
    phow.savemat(conf.histPath, {'hists': hists})
else:
    print str(datetime.now()) + '| Found old historams at:' + conf.histPath
    hists = phow.loadmat(conf.histPath)['hists']


2016-12-10 20:23:51.107821| Computing Spatial Histograms
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    5.9s
[Parallel(n_jobs=8)]: Done  56 tasks      | elapsed:   15.8s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:   30.6s remaining:    0.0s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:   30.6s finished

After that, we use the AdditiveChi2Sampler from sci-kit learn to do a feature mapping (analogously calculating the singular values).


In [7]:
#####################
# Compute feature map
#####################
print str(datetime.now()) + '| Computing Feature Map'
transformer = AdditiveChi2Sampler()
histst = transformer.fit_transform(hists)
train_data = histst[selTrain]
test_data = histst[selTest]


2016-12-10 20:24:35.433906| Computing Feature Map

Fit our word vectors with the Logistic Regression Model using 1, 2, 3, and 4 as our class labels -> corresponding to Bus, Sedan, SUV, and Van.


In [8]:
y = [1] * 25
y = y + [2] * 25
y = y + [3] * 25
y = y + [4] * 25

In [9]:
from sklearn import linear_model

logreg = linear_model.LogisticRegression(C=1e5)
logreg.fit(train_data, y)

logreg.predict_proba(train_data[40].reshape(1,-1))


Out[9]:
array([[  3.09279993e-05,   9.99866188e-01,   1.00075198e-04,
          2.80867582e-06]])

As expected, when given a picture from our training dataset of a Bus, Logistic Regression classifies it as a bus with .99966 percent probability. Note that logistic regression normalizes the probabilities such that the class label probabilities sum to 1.

Now, let's do the visual bag of words on our testing dataset. First, we have to extract the cars from our test dataset.


In [50]:
import scipy.misc
from tqdm import tqdm
from os import listdir
from os.path import isfile, join

cfg.TEST.HAS_RPN = True  # Use RPN for proposals

prototxt = os.path.join(cfg.MODELS_DIR, NETS['vgg16'][0],
                        'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models',
                          NETS['vgg16'][1])

if not os.path.isfile(caffemodel):
    raise IOError(('{:s} not found.\nDid you run ./data/script/'
                   'fetch_faster_rcnn_models.sh?').format(caffemodel))

#Train on GPU ID 0 <- Only works if you've configured CUDA and your GPU correctly
caffe.set_mode_gpu()
caffe.set_device(0)
cfg.GPU_ID = 0
net = caffe.Net(prototxt, caffemodel, caffe.TEST)

print '\n\nLoaded network {:s}'.format(caffemodel)

#Take our test images and run R-CNN on them
im_path = '/home/thomas/pixy/CityScapes_Test/'
im_names = onlyfiles = [f for f in listdir(im_path) if isfile(join(im_path, f))]
im_names = [im_path + tempIm for tempIm in im_names]
output_path = '/home/thomas/pixy/CityScapes_Test_Output/'
outputCounter = 0

#For some reason, cv2 just won't write the images if the directory doesn't exist
if not os.path.isdir(output_path):
    os.mkdir(output_path)

for im_name in tqdm(im_names):
    boxes = detectAndReturnObjects(net, im_name)
    
    for box in boxes:
        im = cv2.imread(im_name)
        box = [int(num) for num in box]
        boxIm = im[box[1]:box[3], box[0]:box[2],:]
        
        #Write the segmented output
        cv2.imwrite(output_path + str(outputCounter) + '.jpg',boxIm)
        outputCounter += 1


  0%|          | 0/2025 [00:00<?, ?it/s]

Loaded network /home/thomas/py-faster-rcnn/data/faster_rcnn_models/VGG16_faster_rcnn_final.caffemodel
100%|██████████| 2025/2025 [11:52<00:00,  3.70it/s]

In [51]:
print("Segmented " + str(outputCounter) + " cars!")


Segmented 6200 cars!

Skip to here if you've already segemented your testing set:


In [12]:
from os import listdir
from os.path import isfile, join

output_path = '/home/thomas/pixy/CityScapes_Test_Output/'

out_names = onlyfiles = [f for f in listdir(output_path) if isfile(join(output_path, f))]
out_names = ['CityScapes_Test_Output/' + tempIm for tempIm in out_names]

In [13]:
tempHistPath = 'tempresults/cityscape1output-hists.py.mat'
OVERWRITE = True #We really don't want to ignore the cache for this, it takes a very long time to compute

In [14]:
############################
# Compute spatial histograms
############################
print str(datetime.now()) + '| Computing Spatial Histograms'
if (not exists(tempHistPath)) | OVERWRITE:
    out_hists = phow.computeHistograms(out_names, model, conf, vocab)
    phow.savemat(tempHistPath, {'hists': out_hists})
else:
    print str(datetime.now()) + '| Found old historams at:' + tempHistPath
    out_hists = phow.loadmat(tempHistPath)['hists']


2016-12-10 20:24:51.505217| Computing Spatial Histograms
[Parallel(n_jobs=8)]: Done  20 tasks      | elapsed:    3.4s
[Parallel(n_jobs=8)]: Done  72 tasks      | elapsed:   11.5s
[Parallel(n_jobs=8)]: Done 129 tasks      | elapsed:   20.4s
[Parallel(n_jobs=8)]: Done 201 tasks      | elapsed:   30.3s
[Parallel(n_jobs=8)]: Done 289 tasks      | elapsed:   45.3s
[Parallel(n_jobs=8)]: Done 393 tasks      | elapsed:  1.0min
[Parallel(n_jobs=8)]: Done 513 tasks      | elapsed:  1.4min
[Parallel(n_jobs=8)]: Done 649 tasks      | elapsed:  1.7min
[Parallel(n_jobs=8)]: Done 801 tasks      | elapsed:  2.1min
[Parallel(n_jobs=8)]: Done 969 tasks      | elapsed:  2.6min
[Parallel(n_jobs=8)]: Done 1153 tasks      | elapsed:  3.1min
[Parallel(n_jobs=8)]: Done 1353 tasks      | elapsed:  3.7min
[Parallel(n_jobs=8)]: Done 1569 tasks      | elapsed:  4.3min
[Parallel(n_jobs=8)]: Done 1801 tasks      | elapsed:  5.0min
[Parallel(n_jobs=8)]: Done 2049 tasks      | elapsed:  5.7min
[Parallel(n_jobs=8)]: Done 2313 tasks      | elapsed:  6.4min
[Parallel(n_jobs=8)]: Done 2593 tasks      | elapsed:  7.2min
[Parallel(n_jobs=8)]: Done 2889 tasks      | elapsed:  8.0min
[Parallel(n_jobs=8)]: Done 3201 tasks      | elapsed:  8.9min
[Parallel(n_jobs=8)]: Done 3529 tasks      | elapsed:  9.9min
[Parallel(n_jobs=8)]: Done 3873 tasks      | elapsed: 10.8min
[Parallel(n_jobs=8)]: Done 4233 tasks      | elapsed: 11.7min
[Parallel(n_jobs=8)]: Done 4609 tasks      | elapsed: 12.9min
[Parallel(n_jobs=8)]: Done 5001 tasks      | elapsed: 14.0min
[Parallel(n_jobs=8)]: Done 5409 tasks      | elapsed: 15.2min
[Parallel(n_jobs=8)]: Done 5833 tasks      | elapsed: 16.3min
[Parallel(n_jobs=8)]: Done 6200 out of 6200 | elapsed: 17.3min finished

In [15]:
#####################
# Compute feature map
#####################
print str(datetime.now()) + '| Computing Feature Map'
transformer = AdditiveChi2Sampler()
out_histst = transformer.fit_transform(out_hists)


2016-12-10 20:46:30.228778| Computing Feature Map

Out class labels are in the form $\begin{bmatrix} P(Bus|Features) & P(Sedan|Features) & P(SUV|Features) & P(Van|Features) \end{bmatrix}$.


In [16]:
logreg.decision_function(out_histst[0].reshape((1,-1)))


Out[16]:
array([[-16.14175298,   0.64812159,  -0.03645473,  -7.9996721 ]])

In [17]:
logreg.predict_proba(out_histst[0].reshape((1,-1)))


Out[17]:
array([[  8.50855348e-08,   5.72034643e-01,   4.27673011e-01,
          2.92260995e-04]])

In [18]:
out_proba = np.empty((0,4), float)

for out_hist in out_histst:
    out_proba = np.vstack((out_proba, logreg.predict_proba(out_hist.reshape(1,-1))))

print(out_proba)


[[  8.50855348e-08   5.72034643e-01   4.27673011e-01   2.92260995e-04]
 [  7.63422631e-04   7.02903436e-04   9.98349174e-01   1.84500170e-04]
 [  1.53968925e-02   3.55315097e-03   9.80337285e-01   7.12671257e-04]
 ..., 
 [  7.18620202e-02   3.74637245e-03   9.24309666e-01   8.19413858e-05]
 [  1.63482686e-05   8.82040689e-01   1.17942681e-01   2.82166680e-07]
 [  3.11203657e-02   1.67834902e-01   8.00852605e-01   1.92127411e-04]]

We toggle a label (1) if there is more than a 20 percent chance of it being classified as that class.


In [19]:
out_labels_idx = np.argwhere(out_proba>= .20)

print(out_labels_idx)


[[   0    1]
 [   0    2]
 [   1    2]
 ..., 
 [6197    2]
 [6198    1]
 [6199    2]]

In [20]:
out_labels = np.zeros(out_proba.shape, float)
for entry in out_labels_idx:
    out_labels[entry[0], entry[1]] = 1
    
print(out_labels)
print(out_labels.shape)


[[ 0.  1.  1.  0.]
 [ 0.  0.  1.  0.]
 [ 0.  0.  1.  0.]
 ..., 
 [ 0.  0.  1.  0.]
 [ 0.  1.  0.  0.]
 [ 0.  0.  1.  0.]]
(6200, 4)

Now that we have our overall labeling matrix, let's feed it into snorkel.

Snorkel Installation and Setup

First, let's load snorkel into our Jupyter python path. For some reason, the snorkel installation doesn't always work so this is here as a backup. Make sure that you follow the installation instructions in the README as well before executing any code.


In [21]:
import os
import sys

#Set the correct environment variables
os.environ['SNORKELHOME']='/home/thomas/snorkel'
os.environ['PYTHONPATH']=':/home/thomas/snorkel:/home/thomas/snorkel/treedlib:/home/thomas/snorkel:/home/thomas/snorkel/treedlib'
os.environ['PATH']='/home/thomas/bin:/home/thomas/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/thomas/snorkel:/home/thomas/snorkel/treedlib:/home/thomas/snorkel:/home/thomas/snorkel/treedlib'

#Add python to the system path so that python can find the package
sys.path.append('/home/thomas/snorkel')
sys.path.append('/home/thomas/snorkel/treedlib')

In [22]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np
from snorkel import SnorkelSession
session = SnorkelSession()

Fitting the Generative Model

We estimate the accuracies of the labeling functions without supervision. Specifically, we estimate the parameters of a NaiveBayes generative model.

First, we have to specify a sparse matrix with labeling function output. The setup of the matrix is as follows:

Rows of the matrix correspond with individual test images Columns of the matrix correspond to individual labeling functions Entries in the matrix are {-1, 0, 1}, the possible outputs of each labeling function

We first fit our generative model to the first class label (the first column). Therefore, we negate the output in the 2nd to 4th columns.


In [75]:
out_labels


Out[75]:
array([[-0.,  1.,  0.,  0.],
       [-0.,  0.,  0.,  0.],
       [-0.,  0.,  0.,  0.],
       ..., 
       [-0.,  0.,  0.,  0.],
       [-0.,  1.,  0.,  0.],
       [-0.,  0.,  0.,  0.]])

In [130]:
bus_labels = out_labels
bus_labels[:,1] = np.negative(out_labels[:,1])
bus_labels[:,2] = np.negative(out_labels[:,2])
bus_labels[:,3] = np.negative(out_labels[:,3])
print(bus_labels)


[[-0. -1.  0.  0.]
 [-0. -0.  0.  0.]
 [-0. -0.  0.  0.]
 ..., 
 [-0. -0.  0.  0.]
 [-0. -1.  0.  0.]
 [-0. -0.  0.  0.]]

In [131]:
#Convert our sparse array into a format that snorkel knows how to deal with
from snorkel.annotations import csr_LabelMatrix
from snorkel.annotations import csr_AnnotationMatrix

bus_label_matrix = csr_LabelMatrix(csr_AnnotationMatrix(bus_labels))
bus_label_matrix


Out[131]:
<6200x4 sparse matrix of type '<type 'numpy.float64'>'
	with 5911 stored elements in Compressed Sparse Row format>

In [132]:
from snorkel.learning import NaiveBayes

gen_model = NaiveBayes()
gen_model.train(bus_label_matrix, n_iter=1000, rate=1e-5)

gen_model.w


================================================================================
Training marginals (!= 0.5):	6200
Features:			4
================================================================================
Begin training for rate=1e-05, mu=1e-06
	Learning epoch = 0	Gradient mag. = 0.415618
	Learning epoch = 250	Gradient mag. = 0.437259
	Learning epoch = 500	Gradient mag. = 0.437026
	Learning epoch = 750	Gradient mag. = 0.436793
Final gradient magnitude for rate=1e-05, mu=1e-06: 0.437
Out[132]:
array([ 0.99416702,  0.99817419,  0.99818156,  0.99818156])

In [133]:
train_marginals = gen_model.marginals(bus_label_matrix)

train_marginals


Out[133]:
array([ 0.26930055,  0.5       ,  0.5       , ...,  0.5       ,
        0.26930055,  0.5       ])

In [134]:
from snorkel.learning import LogReg
from snorkel.learning_utils import RandomSearch, ListParameter, RangeParameter

iter_param = ListParameter('n_iter', [250, 500, 1000, 2000])
rate_param = RangeParameter('rate', 1e-4, 1e-2, step=0.75, log_base=10)
reg_param  = RangeParameter('mu', 1e-8, 1e-2, step=1, log_base=10)

disc_model = LogReg()

In [135]:
searcher = RandomSearch(disc_model, bus_label_matrix, train_marginals, 10, iter_param, rate_param, reg_param)

In [136]:
disc_model.train(bus_label_matrix, train_marginals, n_iter=1000, rate=0.001)


================================================================================
Training marginals (!= 0.5):	2987
Features:			4
================================================================================
Using gradient descent...
	Learning epoch = 0	Step size = 0.001
	Loss = 2070.430628	Gradient magnitude = 610.997400
	Learning epoch = 100	Step size = 0.000904792147114
	Loss = 1757.733500	Gradient magnitude = 0.002778
	Learning epoch = 200	Step size = 0.000818648829479
	Loss = 1757.733500	Gradient magnitude = 0.003070
	Learning epoch = 300	Step size = 0.000740707032156
	Loss = 1757.733500	Gradient magnitude = 0.003392
	Learning epoch = 400	Step size = 0.000670185906007
	Loss = 1757.733500	Gradient magnitude = 0.003748
	Learning epoch = 500	Step size = 0.000606378944861
	Loss = 1757.733500	Gradient magnitude = 0.004141
	Learning epoch = 600	Step size = 0.000548646907485
	Loss = 1757.733500	Gradient magnitude = 0.004575
	Learning epoch = 700	Step size = 0.000496411413431
	Loss = 1757.733500	Gradient magnitude = 0.005054
	Learning epoch = 800	Step size = 0.00044914914861
	Loss = 1757.733500	Gradient magnitude = 0.005584
	Learning epoch = 900	Step size = 0.000406386622545
	Loss = 1757.733500	Gradient magnitude = 0.006168

In [137]:
out_hists


Out[137]:
array([[  3.01204826e-04,   3.76506032e-05,   1.24246988e-03, ...,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   2.92760832e-03, ...,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  6.09013427e-04,   4.56760055e-04,   0.00000000e+00, ...,
          1.52253357e-04,   0.00000000e+00,   0.00000000e+00],
       ..., 
       [  3.21686617e-04,   1.23725622e-05,   3.83549428e-04, ...,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  1.88679245e-04,   4.71698120e-04,   5.66037721e-04, ...,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00]], dtype=float32)

In [138]:
denoisedMatrix = disc_model.predict(bus_label_matrix)

In [139]:
onlyBus = np.where(denoisedMatrix == 1)[0]

In [140]:
len(onlyBus)


Out[140]:
1378

In [141]:
92.0/1378


Out[141]:
0.06676342525399129

In [121]:
onlyVan = np.where(denoisedMatrix == 1)[0]

In [171]:
8.0/1609


Out[171]:
0.004972032318210068

In [122]:
len(onlyVan)


Out[122]:
1609

In [124]:
onlyVan[1:200]


Out[124]:
array([  4,   6,  12,  14,  18,  22,  24,  26,  27,  28,  35,  38,  40,
        43,  46,  53,  56,  57,  58,  66,  75,  76,  78,  82,  88,  99,
       104, 108, 111, 113, 115, 117, 129, 134, 135, 136, 138, 141, 142,
       143, 147, 149, 151, 153, 156, 162, 166, 167, 176, 178, 179, 188,
       195, 197, 200, 201, 204, 206, 212, 213, 214, 223, 228, 232, 233,
       235, 236, 243, 245, 247, 251, 252, 256, 262, 266, 276, 278, 279,
       280, 285, 286, 290, 302, 319, 324, 327, 331, 336, 338, 339, 343,
       345, 348, 349, 350, 358, 364, 365, 366, 367, 376, 384, 386, 389,
       400, 406, 408, 420, 424, 436, 440, 441, 444, 446, 448, 449, 450,
       451, 458, 459, 461, 465, 466, 469, 470, 471, 475, 477, 478, 492,
       493, 496, 498, 504, 505, 510, 511, 516, 518, 519, 522, 526, 532,
       536, 538, 541, 544, 552, 553, 556, 558, 559, 561, 563, 564, 568,
       573, 578, 580, 586, 593, 595, 602, 604, 606, 613, 614, 618, 619,
       624, 625, 634, 641, 643, 646, 647, 651, 664, 673, 677, 683, 687,
       691, 695, 696, 698, 700, 702, 703, 715, 716, 719, 720, 722, 727,
       730, 732, 733, 734])

In [103]:
onlySUV = np.where(denoisedMatrix == 1)[0]

In [104]:
len(onlySUV)


Out[104]:
1378

In [105]:
onlySUV[1:100]


Out[105]:
array([ 11,  19,  23,  29,  31,  32,  33,  34,  51,  54,  67,  77,  79,
        85,  90,  94, 102, 105, 107, 118, 130, 132, 145, 148, 150, 152,
       159, 168, 169, 180, 181, 185, 196, 198, 199, 203, 208, 209, 215,
       217, 220, 225, 226, 229, 237, 239, 248, 257, 258, 259, 260, 261,
       263, 270, 273, 277, 281, 284, 287, 295, 298, 299, 300, 305, 306,
       308, 309, 310, 315, 316, 317, 318, 322, 323, 326, 333, 335, 337,
       340, 341, 344, 346, 351, 353, 370, 371, 372, 385, 391, 393, 396,
       401, 402, 404, 410, 417, 418, 421, 423])

In [129]:
937.0 / 1378


Out[129]:
0.6799709724238027

In [102]:
onlyCars = np.where(denoisedMatrix == 1)[0]

In [90]:
len(onlyCars)


Out[90]:
2987

In [142]:
2702.0/2987


Out[142]:
0.904586541680616

In [58]:
out_proba[277]


Out[58]:
array([ 0.74144286,  0.16875729,  0.08773829,  0.00206156])

In [147]:
MLELabels = [np.argmax(tempProb) for tempProb in out_proba]

In [153]:
BusArray = np.where(np.asarray(MLELabels) == 1)[0]
SedanArray = np.where(np.asarray(MLELabels) == 2)[0]
SUVArray = np.where(np.asarray(MLELabels) == 3)[0]
VanArray = np.where(np.asarray(MLELabels) == 4)[0]

In [154]:
len(SedanArray)


Out[154]:
3341

In [155]:
len(BusArray)


Out[155]:
1368

In [165]:
len(SUVArray)


Out[165]:
604

In [168]:
len(VanArray)


Out[168]:
0

In [157]:
SedanArray[0:100]


Out[157]:
array([  1,   2,   4,   5,   7,   8,  15,  16,  17,  19,  20,  21,  27,
        28,  30,  36,  37,  39,  41,  42,  44,  45,  46,  47,  48,  49,
        50,  52,  55,  58,  59,  61,  62,  63,  64,  68,  69,  70,  74,
        76,  80,  81,  83,  84,  87,  89,  91,  92,  93,  95,  96,  97,
        98, 101, 103, 105, 106, 109, 110, 112, 114, 116, 119, 120, 121,
       122, 123, 124, 125, 126, 127, 128, 133, 134, 137, 138, 139, 140,
       144, 146, 153, 155, 158, 159, 160, 161, 163, 164, 170, 171, 172,
       173, 174, 175, 177, 182, 184, 186, 187, 189])

In [163]:
103.0/1368


Out[163]:
0.07529239766081872

In [164]:
2184.0/3341


Out[164]:
0.6536964980544747

In [169]:
202.0/604


Out[169]:
0.3344370860927152

In [ ]: